#!/usr/bin/python
# --coding:utf-8--


# 使用etree.parse()方法来读取文件
from lxml import etree

# 读取外部文件 hello.html
html = etree.parse('case_demo.html')
result = etree.tostring(html, pretty_print=True)

print(result)

print("=============================")
parser = etree.HTMLParser(encoding='utf-8')
html_element = etree.parse('douban.html', parser=parser)
res = etree.tostring(html_element, pretty_print=True, encoding='utf-8').decode('utf-8')
print(res)
